
/*
Filename: A1_CreateHolding.do
Goal: Construct a comprehensive database tracking institutional holdings in corporate bonds and equities on a monthly basis.
Contact: mjha@gsu.edu (author) and gormley@wustl.edu

Data Sources
All datasets listed below are proprietary and accessible through WRDS.

1. Portfolio Holdings Data
File: CRSPPortfolioHold.dta
Source Path: WRDS → CRSP Quarterly Update → Mutual Funds → Portfolio Holdings
Description: Core holdings data for mutual fund portfolios

2. Security Identifier Linking Files
Equity Links
Files: Cusip_Permco.dta, Ticker_Permco.dta
Source Path: WRDS → CRSP → Annual Update → CRSP/Compustat Merged → Compustat CRSP Link
Purpose: Maps CUSIP and ticker identifiers to PERMCO (permanent company identifiers) for equity securities

Bond Links
File: Cusip_Permco_BondLink.dta
Source Path: WRDS → Linking Queries by WRDS → Bond CRSP Link
Purpose: Links bond CUSIPs to company identifiers

3. Fund-Portfolio Mapping
File: crsp_fund_port_wrds.dta
Source Path: CRSP → Quarterly Update → Mutual Funds → Fund-Portfolio Map
Description: Contains CRSP fund numbers and corresponding portfolio identifiers

4. Fund Summary Information
File: crsp-quarterly-summary.dta
Source Path: CRSP Quarterly Update → Mutual Funds → CRSP Mutual Funds → Summary
Description: Summary-level characteristics and metadata for mutual funds
*/

capture log close
capture restore
set more off
clear all

global time = "month"
global institution = "parent_name"
global firm = "permco" 


***********************************************************************
*** Part 1: Prepare Holding data from WRDS
	
/* 1. Prepare Data */
use "CRSPPortfolioHold_pseudo.dta", clear 
keep crsp_portno report_dt market_val security_name cusip $firm ticker maturity_dt cusip 
gen $time = mofd(report_dt)

/* 2. Add PERMCO */
rename $firm permco_temp
foreach var in 9 8{ 
	gen cusip_all = substr(cusip,1,`var')
	merge m:1 cusip_all using "Cusip_Permco_pseudo.dta" 
	drop if _merge == 2
	drop _merge
	replace permco_temp = $firm if missing(permco_temp)
	drop $firm cusip_all
}

merge m:1 ticker using "Ticker_Permco_pseudo.dta" 
drop if _merge == 2
replace permco_temp = $firm if missing(permco_temp) 
drop $firm _merge

/* 3. Tag Bond Security - we use two methods */
gen bond_security = 0 // bond security identifier

// 3.1 bondlink has only bond
foreach var in 9 8{ 
	gen cusip_all = substr(cusip,1,`var')
	merge m:1 cusip_all using "Cusip_Permco_Bondlink_pseudo.dta" 
	drop if _merge == 2
	replace bond_security = 1 if _merge == 3
	drop _merge
	drop $firm cusip_all
}
rename permco_temp $firm
drop if missing($firm)  

// 3.2 maturity or number and %, ., -, /, or > 2000
gen number_in_security_name = real(regexs(1)) if regexm(security_name,"([0-9]+)")
replace bond_security = 1 if !missing(maturity_dt) | (!missing(number_in_security_name) & (strpos(security_name, "%") | strpos(security_name, "/") | strpos(security_name, ".") | strpos(security_name, "-") | number_in_security_name > 2000))

keep $time crsp_portno $firm market_val bond_security

/* 4. Exclude positions with a negative value  */
gen double temp = round(market_val) 
drop market_val
rename temp market_val	
replace market_val = 0 if market_val < 0 

/* 4. Keep Unique */
gsort crsp_portno $firm $time bond_security
quietly by crsp_portno $firm $time bond_security:  gen dup = cond(_N==1,0,_n)

egen double temp = sum(market_val), by(crsp_portno $firm $time bond_security)
replace market_val = temp
drop if dup > 1
drop temp dup

save "A_CRSP.dta", replace 
// we have to take care of repititive reporting i.e. some of the month portno do not report at all
replace $time = $time + 1 
save "B_CRSP.dta", replace
replace $time = $time + 1
save "C_CRSP.dta", replace


************************************ 
use "crsp_fund_port_wrds.dta", clear

gen month = mofd(begdt)
sort crsp_fundno crsp_portno
quietly by crsp_fundno crsp_portno:  gen dup = cond(_N==1,0,_n)
drop if dup > 1 
drop dup
quietly by crsp_fundno:  gen dup = cond(_N==1,0,_n)

tsset crsp_fundno month  
tsfill, full

ssc install carryforward
bysort crsp_fundno: carryforward crsp_portno, gen(crsp_portno_new)

gsort crsp_fundno -month
bysort crsp_fundno: carryforward crsp_portno_new, gen(crsp_portno_final)
keep month crsp_fundno crsp_portno_final
save "crsp_fund_port.dta", replace 



***** Part 2: Add holding data to CRSP Quartery Summary
use "crsp-quarterly-summary_pseudo.dta", clear
//assign parent_name by running A2_AssignParentNameHolding


******** add flags
global institution = "parent_name"
global time = "month"
keep crsp_fundno caldt crsp_portno fund_name ticker index_fund_flag $institution
/* unique*/
drop if caldt < mdy(06, 01, 2007) 
gen month = mofd(caldt)
duplicates drop crsp_fundno $time, force 

/* Flag passive funds */
gen passive = 0
gen tmp = upper(fund_name)
foreach word in BLOOMBERG DJ DOW FTSE IDX "IND " INDEX INDX KBW MORNINGSTAR MSCI NASDAQ NYSE RUSSELL "S&P" "S & P" "SANDP" "S AND P" SP STOXX WILSHIRE "100" "400" "500" "600" "900" "1000" "1500" "2000" "5000" { 
	replace passive = 1 if strpos(tmp, "`word'")
} 
drop tmp
replace passive = 1 if !missing(index_fund_flag) 
keep $time crsp_fundno crsp_portno $institution passive
sort crsp_fundno month

tsset crsp_fundno month 
tsfill

**** add missing portno
merge 1:1 crsp_fundno month using "crsp_fund_port.dta" 
drop if _merge==2
drop _merge
replace crsp_portno = crsp_portno_final if missing(crsp_portno) 
drop crsp_portno_final


/* fill missing based on fundno */
sort crsp_fundno $time
foreach var in crsp_portno $institution passive {
	bysort crsp_fundno: carryforward `var', gen(`var'_new) 
	drop `var'
	rename `var'_new `var'
}
gsort crsp_fundno -$time
foreach var in crsp_portno $institution passive { 
	bysort crsp_fundno: carryforward `var', gen(`var'_new) 
	drop `var'
	rename `var'_new `var'
}

// unique
gsort crsp_portno $time
quietly by crsp_portno $time:  gen dup = cond(_N==1,0,_n)
	
drop if dup > 1 // keep one observation per portno_month
drop crsp_fundno dup 

/* fill missing - based on fundno */
tsset crsp_portno $time
tsfill 

sort crsp_portno $time
foreach var in $institution passive { 
	bysort crsp_portno: carryforward `var', gen(`var'_new) 
	drop `var'
	rename `var'_new `var'
}
gsort crsp_portno -$time
foreach var in $institution passive { 
	bysort crsp_portno: carryforward `var', gen(`var'_new) 
	drop `var'
	rename `var'_new `var'
}
drop if missing(crsp_portno) | missing($time) | missing($institution)
sort crsp_portno $time

**** start adding holding data
merge 1:m crsp_portno $time using "A_CRSP.dta" 
preserve

drop if _merge < 3
drop _merge
save "A_CRSPMerge.dta", replace

restore
drop if _merge > 1
drop _merge $firm bond_security market_val _merge
merge 1:m crsp_portno $time using "B_CRSP.dta" 
preserve
drop if _merge < 3
drop _merge
save "B_CRSPMerge.dta", replace

restore
drop if _merge > 1
drop _merge $firm bond_security market_val _merge
merge 1:m crsp_portno $time using "C_CRSP.dta" 
drop if _merge < 3
drop _merge
save "C_CRSPMerge.dta", replace
append using "B_CRSPMerge.dta"
append using "A_CRSPMerge.dta"


/* Identify BondFund if the portfolio do not have any equity */
gsort crsp_portno $time bond_security

egen double temp = sum(market_val) if bond_security == 0, by(crsp_portno $time)
egen double portno_equity = mean(temp), by(crsp_portno $time)
replace portno_equity  = 0 if missing(portno_equity)
drop temp	

gen bondfundinv = 0
replace bondfundinv = 1 if portno_equity  == 0
drop portno_equity

/* Create Final Variables  */
drop if missing($institution) | missing($time) | missing($firm)
gsort $institution $time $firm
quietly by $institution $time $firm:  gen dup = cond(_N==1,0,_n)

egen double  mv_total = sum(market_val), by($institution $time $firm)
	
egen double temp = sum(market_val) if bond_security == 1, by($institution $time $firm)
egen double mv_bond = mean(temp), by($institution $time $firm)
replace mv_bond = 0 if missing(mv_bond)
drop temp	

/* bondfund */
egen double  temp = sum(market_val) if bond_security == 1 & bondfundinv == 1, by($institution $time $firm)
egen double  mv_bond_bf = mean(temp), by($institution $time $firm)
replace mv_bond_bf = 0 if missing(mv_bond_bf)
drop temp

egen double  temp = sum(market_val) if bond_security == 0 & bondfundinv == 1, by($institution $time $firm)
egen double  mv_equity_bf = mean(temp), by($institution $time $firm)
replace mv_equity_bf = 0 if missing(mv_equity_bf)
drop temp

	/* passive and bond_fund interaction */
egen double  temp = sum(market_val) if bond_security == 1 & bondfundinv == 1 & passive == 1, by($institution $time $firm)
egen double  mv_bond_bf_pa = mean(temp), by($institution $time $firm)
replace mv_bond_bf_pa = 0 if missing(mv_bond_bf_pa)
drop temp

egen double  temp = sum(market_val) if bond_security == 0 & bondfundinv == 1 & passive == 1, by($institution $time $firm)
egen double  mv_equity_bf_pa = mean(temp), by($institution $time $firm)
replace mv_equity_bf_pa = 0 if missing(mv_equity_bf_pa)
drop temp

/* Total Net Asset of Portfolio */
egen double tna = sum(market_val), by($institution $time)

/* keep unique */
drop if dup > 1 
keep $institution $time $firm mv* tna*
save "HoldingFromCRSP.dta", replace
